library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/ivy/Desktop/Modeling/LowFrequency/data
library(fs)
library(writexl)
library(readxl)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(broom)
library(emmeans)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(superb)
library(pwr)
Three sets of data:
Three models:
# model 3 * train_filter_cond 2 * type_name 3 * preepoch_cond 17 * run 20 * epoch 121 = 740520 entries
accstage <-
read_csv(here("/Users/ivy/Desktop/Modeling/LowFrequency/data/accstage-0905160507.csv")) %>%
rename("model" = model_cond) %>%
pivot_longer(cols = c("train_acc", "target_valid_acc", "full_valid_acc"),
names_to = "type_name", values_to = "acc") %>%
mutate(model = if_else(model == "large", "cnn", model),
train_filter_cond = if_else(train_filter_cond == "lf", "low", "high"),
type_name = case_when(type_name == "train_acc" ~ "train",
type_name == "target_valid_acc" ~ "target testing",
type_name == "full_valid_acc" ~ "full testing")) %>%
mutate(preepoch_cond = factor(as.factor(preepoch_cond),
levels = c("0", "1", "2", "3", "4", "5", "10", "15", "20", "25", "30", "35", "40", "45", "50", "55", "60")),
model = factor(model, levels = c("lstm", "cnn", "reslin")),
train_filter_cond = factor(as.factor(train_filter_cond), levels = c("low", "high")),
type_name = factor(type_name, levels = c("train", "target testing", "full testing"))) %>%
select(model, train_filter_cond, type_name, preepoch_cond, run, epoch, acc)
## Rows: 246840 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): model_cond, train_filter_cond, valid_phone_cond
## dbl (6): run, preepoch_cond, epoch, train_acc, target_valid_acc, full_valid_acc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
accstage
# model 3 * train_filter_cond 2 * type_name 2 * run 20 = 240 entries
accstage_slope_60 <-
accstage %>%
# focus on validation sets in Stage I of condition 60
filter(preepoch_cond == 60,
epoch <= 60 - 1,
type_name != "train") %>%
group_by(model, train_filter_cond, type_name, run) %>%
# document the increase in accuracy of the last five epochs
mutate(acc_inc = acc - lag(acc),
acc_inc_2 = lag(acc_inc),
acc_inc_3 = lag(acc_inc_2),
acc_inc_4 = lag(acc_inc_3),
acc_inc_5 = lag(acc_inc_4)) %>%
filter(epoch == 59)
accstage_slope_60
# set Exp1 stablizing slope
slope_1 = 0.03
# model 3 * train_filter_cond 2 * type_name 2 * run 20 = 240 entries
accstage_slope_0 <-
accstage %>%
# focus on validation sets in Stage II of condition 0
filter(preepoch_cond == 0,
type_name != "train") %>%
group_by(model, train_filter_cond, type_name, run) %>%
# document the increase in accuracy of the last five epochs
mutate(acc_inc = acc - lag(acc),
acc_inc_2 = lag(acc_inc),
acc_inc_3 = lag(acc_inc_2),
acc_inc_4 = lag(acc_inc_3),
acc_inc_5 = lag(acc_inc_4)) %>%
filter(epoch == 119)
accstage_slope_0
# set Exp2 stablizing slope
slope_2 = 0.01
Two training stages:
Three check points:
# set variables
first_stage_end = 15 # before birth
second_stage_mid = 20 # short period after birth
second_stage_end = 30 # when learning completes
average_epoch = 3 # calculate average of 3 consecutive epochs
# model 3 * train_filter_cond 3 * type_name 3 * run 20 * epoch 31 = 16740 entries
accstage_15 <-
accstage %>%
filter(preepoch_cond == first_stage_end |
(preepoch_cond == 0 & train_filter_cond == "low"),
epoch <= second_stage_end - 1) %>%
mutate(train_filter_cond = if_else(preepoch_cond == 0, "full", train_filter_cond),
train_filter_cond = factor(as.factor(train_filter_cond),
levels = c("low", "high", "full"))) %>%
select(-preepoch_cond)
accstage_15
# model 3 (grid_y) * train_filter_cond 3 (grid_x) * type_name 3 (line) * epoch 31 (x) = 837 entries
dataset_sum <-
accstage_15 %>%
group_by(model, train_filter_cond, type_name, epoch) %>%
summarise(mean = mean(acc),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model', 'train_filter_cond', 'type_name'.
## You can override using the `.groups` argument.
dataset_sum
dataset_learning_plot <-
ggplot(data = dataset_sum) +
geom_line(aes(x = epoch, y = mean, color = type_name)) +
geom_ribbon(aes(x = epoch, ymin = ci_min, ymax = ci_max, fill = type_name), alpha = 0.25) +
facet_grid(model ~ train_filter_cond) +
scale_x_continuous(breaks = seq(0, 60, by = 2)) +
scale_y_continuous(breaks = seq(0, 1, by = 0.2),
limits = c(0, 1)) +
labs(x = "epochs", y = "accuracy rates", color = "dataset", fill = "dataset")
dataset_learning_plot
# model 3 * train_filter_cond 2 * type_name 3 * run 20 * epoch 3 = 1080 entries
accstage_1214 <-
accstage_15 %>%
filter(train_filter_cond != "full",
epoch >= first_stage_end - average_epoch & epoch <= first_stage_end - 1)
accstage_1214
# model 3 * train_filter_cond 2 * type_name 3 = 18 entries
sum_1214 <-
accstage_1214 %>%
group_by(train_filter_cond, type_name, model) %>%
summarise(mean = mean(acc),
show_mean = paste0(format(round(100*mean, digits = 2), nsmall = 2), "%"),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se) %>%
mutate(epoch = case_when(train_filter_cond == "low" ~ 12.5,
train_filter_cond == "high" ~ 13.5))
## `summarise()` has grouped output by 'train_filter_cond', 'type_name'. You can
## override using the `.groups` argument.
sum_1214
mdl_1214 <-
lm(acc ~ train_filter_cond * type_name * model, data = accstage_1214)
mdl_1214 %>% summary()
##
## Call:
## lm(formula = acc ~ train_filter_cond * type_name * model, data = accstage_1214)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.060058 -0.005384 -0.000033 0.006246 0.049565
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.340691 0.001620
## train_filter_condhigh -0.041951 0.002290
## type_nametarget testing -0.051806 0.002290
## type_namefull testing -0.255110 0.002290
## modelcnn -0.071622 0.002290
## modelreslin -0.111706 0.002290
## train_filter_condhigh:type_nametarget testing 0.012843 0.003239
## train_filter_condhigh:type_namefull testing 0.009751 0.003239
## train_filter_condhigh:modelcnn 0.027415 0.003239
## train_filter_condhigh:modelreslin 0.031788 0.003239
## type_nametarget testing:modelcnn 0.050460 0.003239
## type_namefull testing:modelcnn 0.138336 0.003239
## type_nametarget testing:modelreslin 0.043112 0.003239
## type_namefull testing:modelreslin 0.088379 0.003239
## train_filter_condhigh:type_nametarget testing:modelcnn -0.019620 0.004581
## train_filter_condhigh:type_namefull testing:modelcnn -0.069118 0.004581
## train_filter_condhigh:type_nametarget testing:modelreslin -0.026381 0.004581
## train_filter_condhigh:type_namefull testing:modelreslin -0.006063 0.004581
## t value Pr(>|t|)
## (Intercept) 210.364 < 2e-16 ***
## train_filter_condhigh -18.316 < 2e-16 ***
## type_nametarget testing -22.619 < 2e-16 ***
## type_namefull testing -111.384 < 2e-16 ***
## modelcnn -31.271 < 2e-16 ***
## modelreslin -48.772 < 2e-16 ***
## train_filter_condhigh:type_nametarget testing 3.965 7.83e-05 ***
## train_filter_condhigh:type_namefull testing 3.011 0.00267 **
## train_filter_condhigh:modelcnn 8.464 < 2e-16 ***
## train_filter_condhigh:modelreslin 9.814 < 2e-16 ***
## type_nametarget testing:modelcnn 15.579 < 2e-16 ***
## type_namefull testing:modelcnn 42.708 < 2e-16 ***
## type_nametarget testing:modelreslin 13.310 < 2e-16 ***
## type_namefull testing:modelreslin 27.285 < 2e-16 ***
## train_filter_condhigh:type_nametarget testing:modelcnn -4.283 2.01e-05 ***
## train_filter_condhigh:type_namefull testing:modelcnn -15.089 < 2e-16 ***
## train_filter_condhigh:type_nametarget testing:modelreslin -5.759 1.11e-08 ***
## train_filter_condhigh:type_namefull testing:modelreslin -1.324 0.18590
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01254 on 1062 degrees of freedom
## Multiple R-squared: 0.9816, Adjusted R-squared: 0.9813
## F-statistic: 3331 on 17 and 1062 DF, p-value: < 2.2e-16
f2 = (0.9813 / (1 - 0.9813)) * (17 / 1062)
pwr.f2.test(u = 17, v = 1062, f2 = f2, sig.level = 0.05)
##
## Multiple regression power calculation
##
## u = 17
## v = 1062
## f2 = 0.8400103
## sig.level = 0.05
## power = 1
pairwise_1214 <-
emmeans(mdl_1214, pairwise ~ train_filter_cond | type_name * model, adjust = "mvt")
pairwise_1214
## $emmeans
## type_name = train, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.3407 0.00162 1062 0.3375 0.3439
## high 0.2987 0.00162 1062 0.2956 0.3019
##
## type_name = target testing, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.2889 0.00162 1062 0.2857 0.2921
## high 0.2598 0.00162 1062 0.2566 0.2630
##
## type_name = full testing, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.0856 0.00162 1062 0.0824 0.0888
## high 0.0534 0.00162 1062 0.0502 0.0566
##
## type_name = train, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.2691 0.00162 1062 0.2659 0.2722
## high 0.2545 0.00162 1062 0.2514 0.2577
##
## type_name = target testing, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.2677 0.00162 1062 0.2645 0.2709
## high 0.2464 0.00162 1062 0.2432 0.2496
##
## type_name = full testing, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.1523 0.00162 1062 0.1491 0.1555
## high 0.0784 0.00162 1062 0.0752 0.0816
##
## type_name = train, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.2290 0.00162 1062 0.2258 0.2322
## high 0.2188 0.00162 1062 0.2156 0.2220
##
## type_name = target testing, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.2203 0.00162 1062 0.2171 0.2235
## high 0.1966 0.00162 1062 0.1934 0.1998
##
## type_name = full testing, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.0623 0.00162 1062 0.0591 0.0654
## high 0.0558 0.00162 1062 0.0526 0.0590
##
## Confidence level used: 0.95
##
## $contrasts
## type_name = train, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.04195 0.00229 1062 18.316 <.0001
##
## type_name = target testing, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.02911 0.00229 1062 12.709 <.0001
##
## type_name = full testing, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.03220 0.00229 1062 14.058 <.0001
##
## type_name = train, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.01454 0.00229 1062 6.347 <.0001
##
## type_name = target testing, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.02131 0.00229 1062 9.305 <.0001
##
## type_name = full testing, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.07390 0.00229 1062 32.266 <.0001
##
## type_name = train, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.01016 0.00229 1062 4.437 <.0001
##
## type_name = target testing, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.02370 0.00229 1062 10.348 <.0001
##
## type_name = full testing, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.00647 0.00229 1062 2.827 0.0048
pairwise_output_1214 <-
pairwise_1214$contrasts %>%
tidy() %>%
select(type_name:contrast, estimate:p.value) %>%
mutate(across(estimate:statistic, ~ round(.x, 2)),
p.value = scales::pvalue(p.value)) %>%
mutate(significance = case_when(p.value <= 0.001 ~ "***",
p.value <= 0.01 ~ "**",
p.value <= 0.05 ~ "*",
p.value <= 0.1 ~ ".",
p.value > 0.1 ~ "N.S."),
model = factor(model, levels = c("lstm", "cnn", "reslin")),
type_name = factor(type_name, levels = c("train", "target testing", "full testing"))) %>%
left_join(sum_1214 %>%
filter(train_filter_cond == "low") %>%
select(type_name:mean))
## Adding missing grouping variables: `train_filter_cond`
## Joining with `by = join_by(type_name, model)`
plot_1214 <-
ggplot(data = sum_1214, aes(x = train_filter_cond, y = mean)) +
geom_bar(stat = "identity", aes(fill = train_filter_cond)) +
geom_errorbar(aes(x = train_filter_cond, ymax = ci_max, ymin = ci_min),
width = 0.5, color = "grey50") +
geom_text(y = sum_1214$mean + 0.05, label = sum_1214$show_mean,
color = "grey50", size = 3) +
# showSignificance(c(1,2), 0.5, 0, "",
# segmentParams = list(color = "grey50"),
# textParams = list(color = "grey50")) +
# geom_text(x = 1.5, y = 0.55, label = sum_1214$significance,
# color = "grey50", size = 3) +
facet_grid(model ~ type_name) +
scale_y_continuous(breaks = seq(0, 0.6, by = 0.2),
limits = c(0, 0.6)) +
scale_fill_brewer(palette = "Pastel1") +
labs(x = "condition", y = "accuracy rates", fill = "condition") +
theme(legend.position = "None")
plot_1214
# model 3 * train_filter_cond 2 * type_name 3 * run 20 * epoch 3 = 1080 entries
accstage_1719 <-
accstage_15 %>%
filter(train_filter_cond != "full",
epoch >= second_stage_mid - average_epoch & epoch <= second_stage_mid - 1)
accstage_1719
# model 3 * train_filter_cond 2 * type_name 3 = 18 entries
sum_1719 <-
accstage_1719 %>%
group_by(train_filter_cond, type_name, model) %>%
summarise(mean = mean(acc),
show_mean = paste0(format(round(100*mean, digits = 2), nsmall = 2), "%"),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se) %>%
mutate(epoch = case_when(train_filter_cond == "low" ~ 17.5,
train_filter_cond == "high" ~ 18.5))
## `summarise()` has grouped output by 'train_filter_cond', 'type_name'. You can
## override using the `.groups` argument.
sum_1719
mdl_1719 <-
lmer(acc ~ train_filter_cond * type_name * model + (1 | run), data = accstage_1719)
mdl_1719 %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: acc ~ train_filter_cond * type_name * model + (1 | run)
## Data: accstage_1719
##
## REML criterion at convergence: -5450.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6614 -0.7131 0.0897 0.7288 2.7931
##
## Random effects:
## Groups Name Variance Std.Dev.
## run (Intercept) 3.373e-05 0.005808
## Residual 3.115e-04 0.017650
## Number of obs: 1080, groups: run, 20
##
## Fixed effects:
## Estimate Std. Error
## (Intercept) 0.597099 0.002623
## train_filter_condhigh -0.047309 0.003222
## type_nametarget testing -0.044882 0.003222
## type_namefull testing -0.044882 0.003222
## modelcnn -0.071307 0.003222
## modelreslin -0.196829 0.003222
## train_filter_condhigh:type_nametarget testing 0.017053 0.004557
## train_filter_condhigh:type_namefull testing 0.017053 0.004557
## train_filter_condhigh:modelcnn 0.036899 0.004557
## train_filter_condhigh:modelreslin 0.040948 0.004557
## type_nametarget testing:modelcnn 0.079687 0.004557
## type_namefull testing:modelcnn 0.079687 0.004557
## type_nametarget testing:modelreslin 0.059575 0.004557
## type_namefull testing:modelreslin 0.059575 0.004557
## train_filter_condhigh:type_nametarget testing:modelcnn -0.016664 0.006445
## train_filter_condhigh:type_namefull testing:modelcnn -0.016664 0.006445
## train_filter_condhigh:type_nametarget testing:modelreslin -0.021138 0.006445
## train_filter_condhigh:type_namefull testing:modelreslin -0.021138 0.006445
## t value
## (Intercept) 227.666
## train_filter_condhigh -14.681
## type_nametarget testing -13.928
## type_namefull testing -13.928
## modelcnn -22.128
## modelreslin -61.081
## train_filter_condhigh:type_nametarget testing 3.742
## train_filter_condhigh:type_namefull testing 3.742
## train_filter_condhigh:modelcnn 8.097
## train_filter_condhigh:modelreslin 8.985
## type_nametarget testing:modelcnn 17.486
## type_namefull testing:modelcnn 17.486
## type_nametarget testing:modelreslin 13.073
## type_namefull testing:modelreslin 13.073
## train_filter_condhigh:type_nametarget testing:modelcnn -2.586
## train_filter_condhigh:type_namefull testing:modelcnn -2.586
## train_filter_condhigh:type_nametarget testing:modelreslin -3.280
## train_filter_condhigh:type_namefull testing:modelreslin -3.280
##
## Correlation matrix not shown by default, as p = 18 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
f2 = (0.9325 / (1 - 0.9325)) * (17 / 1062)
pwr.f2.test(u = 17, v = 1062, f2 = f2, sig.level = 0.05)
##
## Multiple regression power calculation
##
## u = 17
## v = 1062
## f2 = 0.2211411
## sig.level = 0.05
## power = 1
pairwise_1719 <-
emmeans(mdl_1719, pairwise ~ train_filter_cond | type_name * model, adjust = "mvt")
pairwise_1719
## $emmeans
## type_name = train, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.597 0.00262 207 0.592 0.602
## high 0.550 0.00262 207 0.545 0.555
##
## type_name = target testing, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.552 0.00262 207 0.547 0.557
## high 0.522 0.00262 207 0.517 0.527
##
## type_name = full testing, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.552 0.00262 207 0.547 0.557
## high 0.522 0.00262 207 0.517 0.527
##
## type_name = train, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.526 0.00262 207 0.521 0.531
## high 0.515 0.00262 207 0.510 0.521
##
## type_name = target testing, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.561 0.00262 207 0.555 0.566
## high 0.551 0.00262 207 0.545 0.556
##
## type_name = full testing, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.561 0.00262 207 0.555 0.566
## high 0.551 0.00262 207 0.545 0.556
##
## type_name = train, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.400 0.00262 207 0.395 0.405
## high 0.394 0.00262 207 0.389 0.399
##
## type_name = target testing, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.415 0.00262 207 0.410 0.420
## high 0.405 0.00262 207 0.399 0.410
##
## type_name = full testing, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.415 0.00262 207 0.410 0.420
## high 0.405 0.00262 207 0.399 0.410
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## type_name = train, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.04731 0.00322 1043 14.681 <.0001
##
## type_name = target testing, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.03026 0.00322 1043 9.389 <.0001
##
## type_name = full testing, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.03026 0.00322 1043 9.389 <.0001
##
## type_name = train, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.01041 0.00322 1043 3.231 0.0013
##
## type_name = target testing, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.01002 0.00322 1043 3.110 0.0019
##
## type_name = full testing, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.01002 0.00322 1043 3.110 0.0019
##
## type_name = train, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.00636 0.00322 1043 1.974 0.0486
##
## type_name = target testing, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.01045 0.00322 1043 3.242 0.0012
##
## type_name = full testing, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.01045 0.00322 1043 3.242 0.0012
##
## Degrees-of-freedom method: kenward-roger
pairwise_output_1719 <-
pairwise_1719$contrasts %>%
tidy() %>%
select(type_name:contrast, estimate:p.value) %>%
mutate(across(estimate:statistic, ~ round(.x, 2)),
p.value = scales::pvalue(p.value)) %>%
mutate(significance = case_when(p.value <= 0.001 ~ "***",
p.value <= 0.01 ~ "**",
p.value <= 0.05 ~ "*",
p.value <= 0.1 ~ ".",
p.value > 0.1 ~ "N.S."),
model = factor(model, levels = c("lstm", "cnn", "reslin")),
type_name = factor(type_name, levels = c("train", "target testing", "full testing"))) %>%
left_join(sum_1719 %>%
filter(train_filter_cond == "low") %>%
select(type_name:mean))
## Adding missing grouping variables: `train_filter_cond`
## Joining with `by = join_by(type_name, model)`
plot_1719 <-
ggplot(data = sum_1719, aes(x = train_filter_cond, y = mean)) +
geom_bar(stat = "identity", aes(fill = train_filter_cond)) +
geom_errorbar(aes(x = train_filter_cond, ymax = ci_max, ymin = ci_min),
width = 0.5, color = "grey50") +
geom_text(y = sum_1719$mean - 0.08, label = sum_1719$show_mean,
color = "grey50", size = 3) +
# showSignificance(c(1,2), 0.7, 0, "",
# segmentParams = list(color = "grey50"),
# textParams = list(color = "grey50")) +
# geom_text(x = 1.5, y = 0.75, label = sum_1719$significance,
# color = "grey50", size = 3) +
facet_grid(model ~ type_name) +
scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
coord_cartesian(ylim = c(0, 1)) +
scale_fill_brewer(palette = "Pastel1") +
labs(x = "condition", y = "accuracy rates", fill = "condition") +
theme(legend.position = "None")
plot_1719
# model 3 * train_filter_cond 2 * type_name 3 * run 20 * epoch 3 = 1080 entries
accstage_2729 <-
accstage_15 %>%
filter(train_filter_cond != "full",
epoch >= second_stage_end - average_epoch & epoch <= second_stage_end - 1)
accstage_2729
# model 3 * train_filter_cond 2 * type_name 3 = 18 entries
sum_2729 <-
accstage_2729 %>%
group_by(train_filter_cond, type_name, model) %>%
summarise(mean = mean(acc),
show_mean = paste0(format(round(100*mean, digits = 2), nsmall = 2), "%"),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se) %>%
mutate(epoch = case_when(train_filter_cond == "low" ~ 27.5,
train_filter_cond == "high" ~ 28.5))
## `summarise()` has grouped output by 'train_filter_cond', 'type_name'. You can
## override using the `.groups` argument.
sum_2729
mdl_2729 <-
lmer(acc ~ train_filter_cond * type_name * model + (1 | run), data = accstage_2729)
mdl_2729 %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: acc ~ train_filter_cond * type_name * model + (1 | run)
## Data: accstage_2729
##
## REML criterion at convergence: -6714
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8144 -0.6651 0.0178 0.6324 3.0816
##
## Random effects:
## Groups Name Variance Std.Dev.
## run (Intercept) 3.276e-05 0.005723
## Residual 9.299e-05 0.009643
## Number of obs: 1080, groups: run, 20
##
## Fixed effects:
## Estimate Std. Error
## (Intercept) 0.773537 0.001785
## train_filter_condhigh -0.052675 0.001761
## type_nametarget testing -0.182078 0.001761
## type_namefull testing -0.182078 0.001761
## modelcnn -0.151881 0.001761
## modelreslin -0.297977 0.001761
## train_filter_condhigh:type_nametarget testing 0.045787 0.002490
## train_filter_condhigh:type_namefull testing 0.045787 0.002490
## train_filter_condhigh:modelcnn 0.046110 0.002490
## train_filter_condhigh:modelreslin 0.048665 0.002490
## type_nametarget testing:modelcnn 0.172614 0.002490
## type_namefull testing:modelcnn 0.172614 0.002490
## type_nametarget testing:modelreslin 0.182180 0.002490
## type_namefull testing:modelreslin 0.182180 0.002490
## train_filter_condhigh:type_nametarget testing:modelcnn -0.041044 0.003521
## train_filter_condhigh:type_namefull testing:modelcnn -0.041044 0.003521
## train_filter_condhigh:type_nametarget testing:modelreslin -0.051198 0.003521
## train_filter_condhigh:type_namefull testing:modelreslin -0.051198 0.003521
## t value
## (Intercept) 433.25
## train_filter_condhigh -29.92
## type_nametarget testing -103.42
## type_namefull testing -103.42
## modelcnn -86.27
## modelreslin -169.24
## train_filter_condhigh:type_nametarget testing 18.39
## train_filter_condhigh:type_namefull testing 18.39
## train_filter_condhigh:modelcnn 18.52
## train_filter_condhigh:modelreslin 19.55
## type_nametarget testing:modelcnn 69.33
## type_namefull testing:modelcnn 69.33
## type_nametarget testing:modelreslin 73.17
## type_namefull testing:modelreslin 73.17
## train_filter_condhigh:type_nametarget testing:modelcnn -11.66
## train_filter_condhigh:type_namefull testing:modelcnn -11.66
## train_filter_condhigh:type_nametarget testing:modelreslin -14.54
## train_filter_condhigh:type_namefull testing:modelreslin -14.54
##
## Correlation matrix not shown by default, as p = 18 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
f2 = (0.9836 / (1 - 0.9836)) * (17 / 1062)
pwr.f2.test(u = 17, v = 1062, f2 = f2, sig.level = 0.05)
##
## Multiple regression power calculation
##
## u = 17
## v = 1062
## f2 = 0.9600615
## sig.level = 0.05
## power = 1
pairwise_2729 <-
emmeans(mdl_2729, pairwise ~ train_filter_cond | type_name * model, adjust = "mvt")
pairwise_2729
## $emmeans
## type_name = train, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.7735 0.001785 64.12 0.7700 0.7771
## high 0.7209 0.001785 64.12 0.7173 0.7244
##
## type_name = target testing, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.5915 0.001785 64.12 0.5879 0.5950
## high 0.5846 0.001785 64.12 0.5810 0.5881
##
## type_name = full testing, model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.5915 0.001785 64.12 0.5879 0.5950
## high 0.5846 0.001785 64.12 0.5810 0.5881
##
## type_name = train, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.6217 0.001785 64.12 0.6181 0.6252
## high 0.6151 0.001785 64.12 0.6115 0.6187
##
## type_name = target testing, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.6122 0.001785 64.12 0.6086 0.6158
## high 0.6104 0.001785 64.12 0.6068 0.6139
##
## type_name = full testing, model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.6122 0.001785 64.12 0.6086 0.6158
## high 0.6104 0.001785 64.12 0.6068 0.6139
##
## type_name = train, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.4756 0.001785 64.12 0.4720 0.4791
## high 0.4716 0.001785 64.12 0.4680 0.4751
##
## type_name = target testing, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.4757 0.001785 64.12 0.4721 0.4792
## high 0.4662 0.001785 64.12 0.4627 0.4698
##
## type_name = full testing, model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## low 0.4757 0.001785 64.12 0.4721 0.4792
## high 0.4662 0.001785 64.12 0.4627 0.4698
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## type_name = train, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.05268 0.00176 1043 29.918 <.0001
##
## type_name = target testing, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.00689 0.00176 1043 3.912 0.0001
##
## type_name = full testing, model = lstm:
## contrast estimate SE df t.ratio p.value
## low - high 0.00689 0.00176 1043 3.912 0.0001
##
## type_name = train, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.00657 0.00176 1043 3.729 0.0002
##
## type_name = target testing, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.00182 0.00176 1043 1.035 0.3007
##
## type_name = full testing, model = cnn:
## contrast estimate SE df t.ratio p.value
## low - high 0.00182 0.00176 1043 1.035 0.3007
##
## type_name = train, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.00401 0.00176 1043 2.278 0.0230
##
## type_name = target testing, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.00942 0.00176 1043 5.351 <.0001
##
## type_name = full testing, model = reslin:
## contrast estimate SE df t.ratio p.value
## low - high 0.00942 0.00176 1043 5.351 <.0001
##
## Degrees-of-freedom method: kenward-roger
pairwise_output_2729 <-
pairwise_2729$contrasts %>%
tidy() %>%
select(type_name:contrast, estimate:p.value) %>%
mutate(across(estimate:statistic, ~ round(.x, 2)),
p.value = scales::pvalue(p.value)) %>%
mutate(significance = case_when(p.value <= 0.001 ~ "***",
p.value <= 0.01 ~ "**",
p.value <= 0.05 ~ "*",
p.value <= 0.1 ~ ".",
p.value > 0.1 ~ "N.S."),
model = factor(model, levels = c("lstm", "cnn", "reslin")),
type_name = factor(type_name, levels = c("train", "target testing", "full testing"))) %>%
left_join(sum_2729 %>%
filter(train_filter_cond == "low") %>%
select(type_name:mean))
## Adding missing grouping variables: `train_filter_cond`
## Joining with `by = join_by(type_name, model)`
plot_2729 <-
ggplot(data = sum_2729, aes(x = train_filter_cond, y = mean)) +
geom_bar(stat = "identity", aes(fill = train_filter_cond)) +
geom_errorbar(aes(x = train_filter_cond, ymax = ci_max, ymin = ci_min),
width = 0.5, color = "grey50") +
geom_text(y = sum_2729$mean - 0.08, label = sum_2729$show_mean,
color = "grey50", size = 3) +
# showSignificance(c(1,2), 0.9, 0, "",
# segmentParams = list(color = "grey50"),
# textParams = list(color = "grey50")) +
# geom_text(x = 1.5, y = 0.95, label = sum_2729$significance,
# color = "grey50", size = 3) +
facet_grid(model ~ type_name) +
scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
coord_cartesian(ylim = c(0, 1)) +
scale_fill_brewer(palette = "Pastel1") +
labs(x = "condition", y = "accuracy rates", fill = "condition") +
theme(legend.position = "None")
plot_2729
# model 3 (grid_y) * train_filter_cond 2 (line) * type_name 3 (grid_x) * epoch 31 (x) = 558 entries
h_l_sum <-
accstage_15 %>%
filter(train_filter_cond != "full") %>%
group_by(model, train_filter_cond, type_name, epoch) %>%
summarise(mean = mean(acc),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model', 'train_filter_cond', 'type_name'.
## You can override using the `.groups` argument.
h_l_sum
three_stages_plot <-
ggplot() +
# learning curve
geom_line(data = h_l_sum,
aes(x = epoch, y = mean, color = train_filter_cond)) +
geom_ribbon(data = h_l_sum,
aes(x = epoch, ymin = ci_min, ymax = ci_max, fill = train_filter_cond), alpha = 0.25) +
# end of first stage
geom_bar(data = sum_1214,
aes(x = epoch, y = mean, fill = train_filter_cond),
stat = "identity", position = "identity", width = 2.5) +
geom_errorbar(data = sum_1214,
aes(x = epoch, ymax = ci_max, ymin = ci_min),
width = 2, linewidth = 0.3, color = "grey50") +
geom_text(data = pairwise_output_1214,
aes(x = 13, y = mean + 0.05, label = significance),
color = "grey50", size = 3) +
# mid of second stage
geom_bar(data = sum_1719,
aes(x = epoch, y = mean, fill = train_filter_cond),
stat = "identity", position = "identity", width = 2.5) +
geom_errorbar(data = sum_1719,
aes(x = epoch, ymax = ci_max, ymin = ci_min),
width = 2, linewidth = 0.3, color = "grey50") +
geom_text(data = pairwise_output_1719,
aes(x = 18, y = mean + 0.05, label = significance),
color = "grey50", size = 3) +
# end of second stage
geom_bar(data = sum_2729,
aes(x = epoch, y = mean, fill = train_filter_cond),
stat = "identity", position = "identity", width = 2.5) +
geom_errorbar(data = sum_2729,
aes(x = epoch, ymax = ci_max, ymin = ci_min),
width = 2, linewidth = 0.3, color = "grey50") +
geom_text(data = pairwise_output_2729,
aes(x = 28, y = mean + 0.05, label = significance),
color = "grey50", size = 3) +
# stages
geom_text(data = data.frame(x = c(7.5, 22.5),
y = c(0.95, 0.95),
text = c("Stage I", "Stage II")),
aes(x, y, label = text),
color = "grey50", size = 3) +
geom_vline(xintercept = 15, linetype = 3, color = "grey50") +
facet_grid(model ~ type_name) +
scale_x_continuous(breaks = seq(0, 30, by = 5)) +
scale_y_continuous(labels = scales::percent,
breaks = seq(0, 1, by = 0.2),
limits = c(0, 1)) +
scale_color_brewer(palette = "Pastel1") +
scale_fill_brewer(palette = "Pastel1") +
labs(x = "epochs", y = "accuracy rates", color = "condition", fill = "condition")
three_stages_plot
# model 3 * train_filter_cond 2 * type_name 2 * run 20 * epoch 16 = 3840 entries
accstage_slope_15_stage_1 <-
accstage_15 %>%
# focus on only validation sets in stage I of non-full conditions
filter(train_filter_cond != "full",
type_name != "train",
epoch <= first_stage_end - 1) %>%
group_by(model, train_filter_cond, type_name, run) %>%
# document the increase in accuracy in previous five epochs
mutate(acc_inc = acc - lag(acc),
acc_inc_2 = lag(acc_inc),
acc_inc_3 = lag(acc_inc_2),
acc_inc_4 = lag(acc_inc_3),
acc_inc_5 = lag(acc_inc_4))
accstage_slope_15_stage_1
# model 3 * train_filter_cond 2 * type_name 2 * run 20 = 240 entries
accstage_stable_15_stage_1 <-
accstage_slope_15_stage_1 %>%
group_by(model, train_filter_cond, type_name, run) %>%
# include all epochs stablizing at 0.03 slope for more than 3 epochs
filter(-slope_1 <= acc_inc, acc_inc <= slope_1,
-slope_1 <= acc_inc_2, acc_inc_2 <= slope_1,
-slope_1 <= acc_inc_3, acc_inc_3 <= slope_1) %>%
# select the first of stablizing epochs
filter(epoch == first(epoch))
accstage_stable_15_stage_1
# model 3 * train_filter_cond 2 * type_name 2 = 12 entries
stable_sum_15_stage_1 <-
accstage_stable_15_stage_1 %>%
group_by(model, train_filter_cond, type_name) %>%
summarise(mean = mean(epoch),
n = length(epoch),
sd = sd(epoch),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model', 'train_filter_cond'. You can
## override using the `.groups` argument.
stable_sum_15_stage_1
# model 3 * train_filter_cond 2 * type_name 2 * run 20 * epoch 15 = 3600 entries
accstage_slope_15_stage_2 <-
accstage_15 %>%
# focus on only validation sets in stage I of non-full conditions
filter(train_filter_cond != "full",
type_name != "train",
first_stage_end <= epoch,
epoch <= second_stage_end - 1) %>%
group_by(model, train_filter_cond, type_name, run) %>%
# document the increase in accuracy in previous five epochs
mutate(acc_inc = acc - lag(acc),
acc_inc_2 = lag(acc_inc),
acc_inc_3 = lag(acc_inc_2),
acc_inc_4 = lag(acc_inc_3),
acc_inc_5 = lag(acc_inc_4))
accstage_slope_15_stage_2
# model 3 * train_filter_cond 2 * type_name 2 * run 20 = 240 entries
accstage_stable_15_stage_2 <-
accstage_slope_15_stage_2 %>%
group_by(model, train_filter_cond, type_name, run) %>%
# include all epochs stablizing at 0.02 slope for more than 3 epochs
filter(-slope_1 <= acc_inc, acc_inc <= slope_1,
-slope_1 <= acc_inc_2, acc_inc_2 <= slope_1,
-slope_1 <= acc_inc_3, acc_inc_3 <= slope_1) %>%
# select the first of stablizing epochs
filter(epoch == first(epoch))
accstage_stable_15_stage_2
# model 3 * train_filter_cond 2 * type_name 2 = 12 entries
stable_sum_15_stage_2 <-
accstage_stable_15_stage_2 %>%
group_by(model, train_filter_cond, type_name) %>%
summarise(mean = mean(epoch),
n = length(epoch),
sd = sd(epoch),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model', 'train_filter_cond'. You can
## override using the `.groups` argument.
stable_sum_15_stage_2
# model 3 * train_filter_cond 3 * run 20 * epoch 4 = 720 entries
baseline <-
accstage_15 %>%
filter(type_name == "full testing",
epoch == -1 | (epoch >= second_stage_end - average_epoch & epoch <= second_stage_end - 1)) %>%
mutate(train_filter_cond = if_else(epoch == -1, "baseline", train_filter_cond))
baseline
# model 3 * train_filter_cond 4 = 12 entries
baseline_sum <-
baseline %>%
group_by(model, train_filter_cond) %>%
summarise(mean = mean(acc),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model'. You can override using the
## `.groups` argument.
baseline_sum
baseline_mdl <-
lmer(acc ~ train_filter_cond * model + (1 | run), data = baseline)
baseline_mdl %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: acc ~ train_filter_cond * model + (1 | run)
## Data: baseline
##
## REML criterion at convergence: -4345
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6875 -0.6547 0.0255 0.5881 5.4451
##
## Random effects:
## Groups Name Variance Std.Dev.
## run (Intercept) 3.745e-05 0.00612
## Residual 1.102e-04 0.01050
## Number of obs: 720, groups: run, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.027004 0.001926 14.022
## train_filter_condfull 0.555656 0.001916 289.969
## train_filter_condhigh 0.557567 0.001916 290.966
## train_filter_condlow 0.564455 0.001916 294.561
## modelcnn 0.004211 0.001916 2.197
## modelreslin -0.001523 0.001916 -0.795
## train_filter_condfull:modelcnn 0.041755 0.002710 15.408
## train_filter_condhigh:modelcnn 0.021587 0.002710 7.966
## train_filter_condlow:modelcnn 0.016522 0.002710 6.097
## train_filter_condfull:modelreslin -0.083468 0.002710 -30.800
## train_filter_condhigh:modelreslin -0.116807 0.002710 -43.102
## train_filter_condlow:modelreslin -0.114274 0.002710 -42.168
##
## Correlation of Fixed Effects:
## (Intr) trn_fltr_cndf trn_fltr_cndh trn_fltr_cndl mdlcnn
## trn_fltr_cndf -0.498
## trn_fltr_cndh -0.498 0.500
## trn_fltr_cndl -0.498 0.500 0.500
## modelcnn -0.498 0.500 0.500 0.500
## modelreslin -0.498 0.500 0.500 0.500 0.500
## trn_fltr_cndfll:mdlc 0.352 -0.707 -0.354 -0.354 -0.707
## trn_fltr_cndhgh:mdlc 0.352 -0.354 -0.707 -0.354 -0.707
## trn_fltr_cndlw:mdlc 0.352 -0.354 -0.354 -0.707 -0.707
## trn_fltr_cndfll:mdlr 0.352 -0.707 -0.354 -0.354 -0.354
## trn_fltr_cndhgh:mdlr 0.352 -0.354 -0.707 -0.354 -0.354
## trn_fltr_cndlw:mdlr 0.352 -0.354 -0.354 -0.707 -0.354
## mdlrsl trn_fltr_cndfll:mdlc trn_fltr_cndhgh:mdlc
## trn_fltr_cndf
## trn_fltr_cndh
## trn_fltr_cndl
## modelcnn
## modelreslin
## trn_fltr_cndfll:mdlc -0.354
## trn_fltr_cndhgh:mdlc -0.354 0.500
## trn_fltr_cndlw:mdlc -0.354 0.500 0.500
## trn_fltr_cndfll:mdlr -0.707 0.500 0.250
## trn_fltr_cndhgh:mdlr -0.707 0.250 0.500
## trn_fltr_cndlw:mdlr -0.707 0.250 0.250
## trn_fltr_cndlw:mdlc trn_fltr_cndfll:mdlr
## trn_fltr_cndf
## trn_fltr_cndh
## trn_fltr_cndl
## modelcnn
## modelreslin
## trn_fltr_cndfll:mdlc
## trn_fltr_cndhgh:mdlc
## trn_fltr_cndlw:mdlc
## trn_fltr_cndfll:mdlr 0.250
## trn_fltr_cndhgh:mdlr 0.250 0.500
## trn_fltr_cndlw:mdlr 0.500 0.500
## trn_fltr_cndhgh:mdlr
## trn_fltr_cndf
## trn_fltr_cndh
## trn_fltr_cndl
## modelcnn
## modelreslin
## trn_fltr_cndfll:mdlc
## trn_fltr_cndhgh:mdlc
## trn_fltr_cndlw:mdlc
## trn_fltr_cndfll:mdlr
## trn_fltr_cndhgh:mdlr
## trn_fltr_cndlw:mdlr 0.500
baseline_pairwise <-
emmeans(baseline_mdl, pairwise ~ train_filter_cond | model, adjust = "mvt")
baseline_pairwise
## $emmeans
## model = lstm:
## train_filter_cond emmean SE df lower.CL upper.CL
## baseline 0.0270 0.00193 62.5 0.0232 0.0309
## full 0.5827 0.00193 62.5 0.5788 0.5865
## high 0.5846 0.00193 62.5 0.5807 0.5884
## low 0.5915 0.00193 62.5 0.5876 0.5953
##
## model = cnn:
## train_filter_cond emmean SE df lower.CL upper.CL
## baseline 0.0312 0.00193 62.5 0.0274 0.0351
## full 0.6286 0.00193 62.5 0.6248 0.6325
## high 0.6104 0.00193 62.5 0.6065 0.6142
## low 0.6122 0.00193 62.5 0.6083 0.6160
##
## model = reslin:
## train_filter_cond emmean SE df lower.CL upper.CL
## baseline 0.0255 0.00193 62.5 0.0216 0.0293
## full 0.4977 0.00193 62.5 0.4938 0.5015
## high 0.4662 0.00193 62.5 0.4624 0.4701
## low 0.4757 0.00193 62.5 0.4718 0.4795
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## model = lstm:
## contrast estimate SE df t.ratio p.value
## baseline - full -0.55566 0.00192 689 -289.969 <.0001
## baseline - high -0.55757 0.00192 689 -290.966 <.0001
## baseline - low -0.56446 0.00192 689 -294.561 <.0001
## full - high -0.00191 0.00192 689 -0.997 0.7510
## full - low -0.00880 0.00192 689 -4.592 <.0001
## high - low -0.00689 0.00192 689 -3.595 0.0021
##
## model = cnn:
## contrast estimate SE df t.ratio p.value
## baseline - full -0.59741 0.00192 689 -311.759 <.0001
## baseline - high -0.57915 0.00192 689 -302.232 <.0001
## baseline - low -0.58098 0.00192 689 -303.183 <.0001
## full - high 0.01826 0.00192 689 9.527 <.0001
## full - low 0.01643 0.00192 689 8.576 <.0001
## high - low -0.00182 0.00192 689 -0.951 0.7770
##
## model = reslin:
## contrast estimate SE df t.ratio p.value
## baseline - full -0.47219 0.00192 689 -246.411 <.0001
## baseline - high -0.44076 0.00192 689 -230.011 <.0001
## baseline - low -0.45018 0.00192 689 -234.927 <.0001
## full - high 0.03143 0.00192 689 16.400 <.0001
## full - low 0.02201 0.00192 689 11.484 <.0001
## high - low -0.00942 0.00192 689 -4.916 <.0001
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: mvt method for 6 tests
table_1 <-
baseline_pairwise$contrasts %>%
tidy() %>%
select(model, contrast, estimate:adj.p.value) %>%
mutate(across(estimate:statistic, ~ round(.x, 3)),
adj.p.value = scales::pvalue(adj.p.value))
table_2 <-
pairwise_1214$contrasts %>% tidy() %>% mutate(check_point = "CP1") %>%
full_join(pairwise_1719$contrasts %>% tidy() %>% mutate(check_point = "CP2")) %>%
full_join(pairwise_2729$contrasts %>% tidy() %>% mutate(check_point = "CP3")) %>%
select(check_point, model, type_name, contrast, estimate:p.value) %>%
rename(data_set = "type_name") %>%
mutate(across(estimate:statistic, ~ round(.x, 3)),
p.value = scales::pvalue(p.value))
## Joining with `by = join_by(type_name, model, term, contrast, null.value,
## estimate, std.error, df, statistic, p.value, check_point)`
## Joining with `by = join_by(type_name, model, term, contrast, null.value,
## estimate, std.error, df, statistic, p.value, check_point)`
#write_xlsx(table_1, path = "plots/table_1.xlsx")
#write_xlsx(table_2, path = "plots/table_2.xlsx")
ggsave("plots/three stages.pdf", three_stages_plot, width = 8, height = 7)
# model 3 (grid_y) * train_filter_cond 2 (grid_x) * type_name 3 (grid_x) * preepoch_cond 17 (line) * epoch 121 (x) = 37026 entries
preepoch_sum <-
accstage %>%
group_by(model, train_filter_cond, type_name, preepoch_cond, epoch) %>%
summarise(mean = mean(acc),
show_mean = paste0(format(round(100*mean, digits = 2), nsmall = 2), "%"),
n = length(acc),
sd = sd(acc),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model', 'train_filter_cond', 'type_name',
## 'preepoch_cond'. You can override using the `.groups` argument.
preepoch_sum
preepoch_learning_plot <-
ggplot(data = preepoch_sum) +
geom_line(aes(x = epoch, y = mean, color = preepoch_cond)) +
geom_ribbon(aes(x = epoch, ymin = ci_min, ymax = ci_max, fill = preepoch_cond), alpha = 0.25) +
facet_grid(model ~ interaction(type_name, train_filter_cond)) +
scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
labs(x = "epochs", y = "accuracy rates", color = "conditions", fill = "conditions")
preepoch_learning_plot
# model 3 * type_name 2 * preepoch_cond 17 * run 20 * epoch 121 = 246840 entries
accstage_slope <-
accstage %>%
# focus on only low conditions
filter(train_filter_cond == "low") %>%
# focus on only validation sets
filter(type_name != "train") %>%
group_by(model, train_filter_cond, type_name, preepoch_cond, run) %>%
# document the increase in accuracy in previous ten epochs
mutate(acc_inc = acc - lag(acc),
acc_inc_2 = lag(acc_inc),
acc_inc_3 = lag(acc_inc_2),
acc_inc_4 = lag(acc_inc_3),
acc_inc_5 = lag(acc_inc_4),
post_epoch = as.numeric(epoch) - as.numeric(as.character(preepoch_cond)))
accstage_slope
# model 3 * type_name 2 * preepoch_cond 17 * run 20 = 2040 entries
accstage_stable <-
accstage_slope %>%
group_by(model, train_filter_cond, type_name, preepoch_cond, run) %>%
# filter out all stage 1 training epochs
filter(post_epoch >= 0) %>%
# include all epochs stablizing at 0.01 slope for more than 3 epochs
filter(-slope_2 <= acc_inc, acc_inc <= slope_2,
-slope_2 <= acc_inc_2, acc_inc_2 <= slope_2,
-slope_2 <= acc_inc_3, acc_inc_3 <= slope_2) %>%
# select the first of stablizing epochs
filter(epoch == first(epoch))
accstage_stable
# model 3 * type_name 2 * preepoch_cond 17 = 102 entries
stable_sum <-
accstage_stable %>%
group_by(model, train_filter_cond, type_name, preepoch_cond) %>%
summarise(mean = mean(post_epoch),
n = length(post_epoch),
sd = sd(post_epoch),
se = sd / sqrt(n),
t = qt((0.95)/2 + 0.5, n-1),
ci_min = mean - t * se,
ci_max = mean + t * se)
## `summarise()` has grouped output by 'model', 'train_filter_cond', 'type_name'.
## You can override using the `.groups` argument.
stable_sum
preepoch_speed_table <-
stable_sum %>%
select(model, train_filter_cond, type_name, preepoch_cond, mean) %>%
mutate(mean = as.integer(mean)) %>%
pivot_wider(names_from = preepoch_cond, values_from = mean)
preepoch_speed_table
preepoch_speed_plot <-
ggplot(data = stable_sum, aes(x = preepoch_cond, y = mean)) +
geom_point(aes(color = preepoch_cond)) +
geom_path(group = 1) +
geom_errorbar(aes(ymax = ci_max, ymin = ci_min, color = preepoch_cond)) +
facet_grid(model ~ interaction(type_name, train_filter_cond)) +
scale_y_continuous(breaks = seq(0, 30, by = 5)) +
labs(x = "condition", y = "average number of epochs to reach convergence", color = "condition")
preepoch_speed_plot
# focus preepoch_cond
focus_cond = 15
# get all nine grids
groups <- unique(paste(accstage_stable$model, accstage_stable$type_name, sep = "_"))
groups
## [1] "reslin_target testing" "reslin_full testing" "lstm_target testing"
## [4] "lstm_full testing" "cnn_target testing" "cnn_full testing"
# apply function to all nine grids in low filter condition
lapply(groups, function(group) {
lm(post_epoch ~ preepoch_cond,
data = accstage_stable %>%
filter(model == str_split(group, "_")[[1]][1],
train_filter_cond == "low",
type_name == str_split(group, "_")[[1]][2],
preepoch_cond == 0 | preepoch_cond == focus_cond)) %>%
summary()
})
## [[1]]
##
## Call:
## lm(formula = post_epoch ~ preepoch_cond, data = accstage_stable %>%
## filter(model == str_split(group, "_")[[1]][1], train_filter_cond ==
## "low", type_name == str_split(group, "_")[[1]][2], preepoch_cond ==
## 0 | preepoch_cond == focus_cond))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.500 -2.800 -1.025 3.462 10.450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.5000 0.9787 13.794 2.25e-16 ***
## preepoch_cond15 -2.9500 1.3841 -2.131 0.0396 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.377 on 38 degrees of freedom
## Multiple R-squared: 0.1068, Adjusted R-squared: 0.08328
## F-statistic: 4.543 on 1 and 38 DF, p-value: 0.03958
##
##
## [[2]]
##
## Call:
## lm(formula = post_epoch ~ preepoch_cond, data = accstage_stable %>%
## filter(model == str_split(group, "_")[[1]][1], train_filter_cond ==
## "low", type_name == str_split(group, "_")[[1]][2], preepoch_cond ==
## 0 | preepoch_cond == focus_cond))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.500 -2.800 -1.025 3.462 10.450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.5000 0.9787 13.794 2.25e-16 ***
## preepoch_cond15 -2.9500 1.3841 -2.131 0.0396 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.377 on 38 degrees of freedom
## Multiple R-squared: 0.1068, Adjusted R-squared: 0.08328
## F-statistic: 4.543 on 1 and 38 DF, p-value: 0.03958
##
##
## [[3]]
##
## Call:
## lm(formula = post_epoch ~ preepoch_cond, data = accstage_stable %>%
## filter(model == str_split(group, "_")[[1]][1], train_filter_cond ==
## "low", type_name == str_split(group, "_")[[1]][2], preepoch_cond ==
## 0 | preepoch_cond == focus_cond))
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.450 -2.450 -0.325 2.550 6.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.450 0.722 17.244 <2e-16 ***
## preepoch_cond15 -2.250 1.021 -2.204 0.0337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.229 on 38 degrees of freedom
## Multiple R-squared: 0.1133, Adjusted R-squared: 0.08998
## F-statistic: 4.856 on 1 and 38 DF, p-value: 0.03368
##
##
## [[4]]
##
## Call:
## lm(formula = post_epoch ~ preepoch_cond, data = accstage_stable %>%
## filter(model == str_split(group, "_")[[1]][1], train_filter_cond ==
## "low", type_name == str_split(group, "_")[[1]][2], preepoch_cond ==
## 0 | preepoch_cond == focus_cond))
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.450 -2.450 -0.325 2.550 6.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.450 0.722 17.244 <2e-16 ***
## preepoch_cond15 -2.250 1.021 -2.204 0.0337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.229 on 38 degrees of freedom
## Multiple R-squared: 0.1133, Adjusted R-squared: 0.08998
## F-statistic: 4.856 on 1 and 38 DF, p-value: 0.03368
##
##
## [[5]]
##
## Call:
## lm(formula = post_epoch ~ preepoch_cond, data = accstage_stable %>%
## filter(model == str_split(group, "_")[[1]][1], train_filter_cond ==
## "low", type_name == str_split(group, "_")[[1]][2], preepoch_cond ==
## 0 | preepoch_cond == focus_cond))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.60 -1.60 -0.55 1.45 9.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.600 0.838 16.228 <2e-16 ***
## preepoch_cond15 -3.050 1.185 -2.573 0.0141 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.748 on 38 degrees of freedom
## Multiple R-squared: 0.1484, Adjusted R-squared: 0.126
## F-statistic: 6.623 on 1 and 38 DF, p-value: 0.01409
##
##
## [[6]]
##
## Call:
## lm(formula = post_epoch ~ preepoch_cond, data = accstage_stable %>%
## filter(model == str_split(group, "_")[[1]][1], train_filter_cond ==
## "low", type_name == str_split(group, "_")[[1]][2], preepoch_cond ==
## 0 | preepoch_cond == focus_cond))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.60 -1.60 -0.55 1.45 9.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.600 0.838 16.228 <2e-16 ***
## preepoch_cond15 -3.050 1.185 -2.573 0.0141 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.748 on 38 degrees of freedom
## Multiple R-squared: 0.1484, Adjusted R-squared: 0.126
## F-statistic: 6.623 on 1 and 38 DF, p-value: 0.01409